Chapter 5 Community composition
5.1 Filter data
Filter samples with high host data
sample_metadata <- sample_metadata %>%
filter(!sample %in% c("EHI02721", "EHI02712", "EHI02700", "EHI02720", "EHI02749", "EHI02719", "EHI02729", "EHI02715", "EHI02722"))
genome_counts_filt <- genome_counts %>%
select(one_of(c("genome",sample_metadata$sample)))%>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
select_if(~!all(. == 0))
genome_counts <- genome_counts_filt
genome_metadata <- genome_metadata %>%
semi_join(., genome_counts_filt, by = "genome") %>%
arrange(match(genome,genome_counts_filt$genome))
genome_tree <- keep.tip(genome_tree, tip=genome_metadata$genome) # keep only MAG tips
#load("data/genome_gifts.Rdata")Make a phyloseq object
phylo_samples <- sample_metadata %>%
column_to_rownames("sample") %>%
sample_data() #convert to phyloseq sample_data object
phylo_genome <- genome_counts_filt %>%
column_to_rownames("genome") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>%
column_to_rownames("genome") %>%
as.matrix() %>%
tax_table() #convert to phyloseq tax_table object
phylo_tree <- phy_tree(genome_tree)
physeq_genome <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples,phylo_tree)
physeq_genome_clr <- microbiome::transform(physeq_genome, 'clr')5.3 Taxonomy overview
5.3.1 Stacked barplot
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_grid(~ diet, scale="free", space = "free")+
# facet_nested(. ~ region+diet, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 12, lineheight = 0.6),
strip.placement = "outside",
axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")Number of bacteria phyla
[1] 14
Bacteria phyla in wild individuals
[1] 14
Bacteria phyla captive animals
[1] 14
Bacteria phyla before grass is included in the diet
[1] 14
Bacteria phyla after grass is included in the diet
[1] 14
Number of Archaea phyla
[1] 1
Archaea phyla in wild individuals
[1] 0
Archaea phyla before grass is included in the diet
[1] "p__Methanobacteriota"
Archaea phyla after grass is included in the diet
[1] "p__Methanobacteriota"
5.3.2 Genus and species annotation
Number of MAGs without species-level annotation
[1] 749
| phylum | count_nospecies | count_total | percentage |
|---|---|---|---|
| p__Actinomycetota | 15 | 15 | 100.00000 |
| p__Bacillota | 52 | 53 | 98.11321 |
| p__Bacillota_A | 516 | 526 | 98.09886 |
| p__Bacillota_B | 2 | 2 | 100.00000 |
| p__Bacillota_C | 6 | 9 | 66.66667 |
| p__Bacteroidota | 43 | 127 | 33.85827 |
| p__Campylobacterota | 1 | 1 | 100.00000 |
| p__Cyanobacteriota | 6 | 7 | 85.71429 |
| p__Desulfobacterota | 12 | 12 | 100.00000 |
| p__Patescibacteria | 13 | 13 | 100.00000 |
| p__Pseudomonadota | 32 | 39 | 82.05128 |
| p__Spirochaetota | 2 | 2 | 100.00000 |
| p__Synergistota | 18 | 18 | 100.00000 |
| p__Verrucomicrobiota | 31 | 35 | 88.57143 |
Percentage of MAGs without species-level annotation
[1] 87.09302
Number of MAGs without genera-level annotation
79
5.3.3 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,region, diet) %>%
summarise(relabun=sum(count))phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")5.3.3.1 Origin: Wild vs Captive
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
Wild_mean=mean(relabun[diet=="Wild"]*100, na.rm=T),
Wild_sd=sd(relabun[diet=="Wild"]*100, na.rm=T),
Captive_mean=mean(relabun[diet=="Pre_grass"]*100, na.rm=T),
Captive_sd=sd(relabun[diet=="Pre_grass"]*100, na.rm=T)) %>%
mutate(total=str_c(round(total_mean,3),"±",round(total_sd,3)),
Wild=str_c(round(Wild_mean,3),"±",round(Wild_sd,3)),
Captive=str_c(round(Captive_mean,3),"±",round(Captive_sd,3))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total,Wild,Captive)# A tibble: 15 × 4
phylum total Wild Captive
<chr> <chr> <chr> <chr>
1 p__Bacillota_A 57.243±12.618 48.636±13.52 59.846±10.725
2 p__Bacteroidota 25.787±10.113 26.889±11.805 24.986±10.083
3 p__Bacillota 4.118±7.202 5.066±11.152 4.949±5.297
4 p__Spirochaetota 3.911±10.011 11.731±14.792 0.001±0.001
5 p__Verrucomicrobiota 2.264±5.722 1.472±0.715 1.449±1.295
6 p__Pseudomonadota 1.626±3.328 0.511±0.397 3.082±5.382
7 p__Patescibacteria 0.987±1.542 0.212±0.253 2.177±2.223
8 p__Synergistota 0.96±0.852 1.865±0.83 0.45±0.39
9 p__Bacillota_C 0.844±0.795 1.696±0.674 0.386±0.36
10 p__Actinomycetota 0.78±0.985 0.427±0.459 1.217±1.147
11 p__Cyanobacteriota 0.708±2.081 0.277±0.614 0.916±3.024
12 p__Desulfobacterota 0.586±0.396 0.991±0.315 0.41±0.256
13 p__Bacillota_B 0.104±0.145 0.228±0.203 0.042±0.024
14 p__Methanobacteriota 0.082±0.217 0±0 0.09±0.228
15 p__Campylobacterota 0±0 0±0 0±0
5.3.3.2 Origin and diet
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
Wild_mean=mean(relabun[diet=="Wild"]*100, na.rm=T),
Wild_sd=sd(relabun[diet=="Wild"]*100, na.rm=T),
Pre_grass_mean=mean(relabun[diet=="Pre_grass"]*100, na.rm=T),
Pre_grass_sd=sd(relabun[diet=="Pre_grass"]*100, na.rm=T),
Post_grass_mean=mean(relabun[diet=="Post_grass"]*100, na.rm=T),
Post_grass_sd=sd(relabun[diet=="Post_grass"]*100, na.rm=T)) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2)),
Wild=str_c(round(Wild_mean,2),"±",round(Wild_sd,2)),
Pre_grass=str_c(round(Pre_grass_mean,6),"±",round(Pre_grass_sd,6)),
Post_grass=str_c(round(Post_grass_mean,2),"±",round(Post_grass_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total,Wild,Pre_grass,Post_grass)# A tibble: 15 × 5
phylum total Wild Pre_grass Post_grass
<chr> <chr> <chr> <chr> <chr>
1 p__Bacillota_A 57.24±12.62 48.64±13.52 59.845932±10.725422 63.25±9.01
2 p__Bacteroidota 25.79±10.11 26.89±11.81 24.986055±10.082621 25.49±9.07
3 p__Bacillota 4.12±7.2 5.07±11.15 4.94862±5.296738 2.34±2.73
4 p__Spirochaetota 3.91±10.01 11.73±14.79 0.00063±0.000727 0±0
5 p__Verrucomicrobiota 2.26±5.72 1.47±0.71 1.448674±1.294628 3.87±9.89
6 p__Pseudomonadota 1.63±3.33 0.51±0.4 3.082488±5.382343 1.29±1.52
7 p__Patescibacteria 0.99±1.54 0.21±0.25 2.176993±2.223404 0.57±0.41
8 p__Synergistota 0.96±0.85 1.86±0.83 0.449802±0.390105 0.57±0.35
9 p__Bacillota_C 0.84±0.79 1.7±0.67 0.385614±0.359883 0.45±0.49
10 p__Actinomycetota 0.78±0.99 0.43±0.46 1.217389±1.147372 0.7±1.1
11 p__Cyanobacteriota 0.71±2.08 0.28±0.61 0.915635±3.024426 0.93±1.99
12 p__Desulfobacterota 0.59±0.4 0.99±0.31 0.410146±0.255958 0.36±0.25
13 p__Bacillota_B 0.1±0.15 0.23±0.2 0.041532±0.023962 0.04±0.02
14 p__Methanobacteriota 0.08±0.22 0±0 0.090366±0.227643 0.15±0.29
15 p__Campylobacterota 0±0 0±0 0.000127±0.000226 0±0
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
left_join(genome_metadata %>% select(phylum,phylum) %>% unique(),by=join_by(phylum==phylum)) %>%
# left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(phylum %in% phylum_arrange[1:20]) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~diet)+
theme_minimal() +
labs(y="phylum", x="Relative abundance", color="Phylum")5.4 Taxonomy boxplot
5.4.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family, diet,region) %>%
summarise(relabun=sum(count))
family_summary$diet <- factor(family_summary$diet, levels=c("Pre_grass", "Post_grass", "Wild"))5.4.1.1 Origin: Wild vs Captive
family_summary %>%
group_by(family) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
Wild_mean=mean(relabun[diet=="Wild"]*100, na.rm=T),
Wild_sd=sd(relabun[diet=="Wild"]*100, na.rm=T),
Cap_mean=mean(relabun[region=="Nafarroa"]*100, na.rm=T),
Cap_sd=sd(relabun[region=="Nafarroa"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,2),"±",round(total_sd,2)),
Wild=str_c(round(Wild_mean,2),"±",round(Wild_sd,2)),
Captive=str_c(round(Cap_mean,2),"±",round(Cap_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(family,Total,Wild,Captive) %>%
paged_table()5.4.1.2 Origin and Diet
family_summary %>%
group_by(family) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
Wild_mean=mean(relabun[diet=="Wild"]*100, na.rm=T),
Wild_sd=sd(relabun[diet=="Wild"]*100, na.rm=T),
Pre_grass_mean=mean(relabun[diet=="Pre_grass"]*100, na.rm=T),
Pre_grass_sd=sd(relabun[diet=="Pre_grass"]*100, na.rm=T),
Post_grass_mean=mean(relabun[diet=="Post_grass"]*100, na.rm=T),
Post_grass_sd=sd(relabun[diet=="Post_grass"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,2),"±",round(total_sd,2)),
Wild=str_c(round(Wild_mean,2),"±",round(Wild_sd,2)),
Pre_grass=str_c(round(Pre_grass_mean,2),"±",round(Pre_grass_sd,2)),
Post_grass=str_c(round(Post_grass_mean,2),"±",round(Post_grass_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(family,Total,Wild,Pre_grass,Post_grass) %>%
paged_table()family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~diet)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")5.4.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus, diet) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary$diet <- factor(genus_summary$diet, levels=c("Pre_grass", "Post_grass", "Wild"))5.4.3 origin and diet
genus_summary %>%
group_by(genus) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
Wild_mean=mean(relabun[diet=="Wild"]*100, na.rm=T),
Wild_sd=sd(relabun[diet=="Wild"]*100, na.rm=T),
Pre_grass_mean=mean(relabun[diet=="Pre_grass"]*100, na.rm=T),
Pre_grass_sd=sd(relabun[diet=="Pre_grass"]*100, na.rm=T),
Post_grass_mean=mean(relabun[diet=="Post_grass"]*100, na.rm=T),
Post_grass_sd=sd(relabun[diet=="Post_grass"]*100, na.rm=T)) %>%
mutate(Total=str_c(round(total_mean,2),"±",round(total_sd,2)),
Wild=str_c(round(Wild_mean,2),"±",round(Wild_sd,2)),
Pre_grass=str_c(round(Pre_grass_mean,2),"±",round(Pre_grass_sd,2)),
Post_grass=str_c(round(Post_grass_mean,2),"±",round(Post_grass_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(genus,Total,Wild,Pre_grass,Post_grass) %>%
paged_table()genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~diet)+
theme_minimal() +
theme(axis.text.y = element_text(size=6))+
labs(y="Family", x="Relative abundance", color="Phylum")